clear all
set maxvar 30000
version 14
capture log close
set more off

****************************************************************************************************
* -----   Customize the paths and options:   ----- 
****************************************************************************************************
*cd  "Data\"

*
*global MY_IN_PATH   "C:\Users\benjamin.balsmeier\Dropbox\RnD_tax_credit\Data"
*global MY_OUT_PATH  "C:\Users\benjamin.balsmeier\Dropbox\RnD_tax_credit\Data"
*global MY_TEMP_PATH "..."

global MY_OUT_FILE  ${MY_OUT_PATH}out.dta
global MY_LOG_FILE  ${MY_OUT_PATH}cr_out.log
*

global MY_IN_PATH   "C:\Users\Joel Stiebale\Dropbox\RnD_tax_credit\Data"
global MY_OUT_PATH  "C:\Users\Joel Stiebale\Dropbox\RnD_tax_credit\Data"
global TABLE_PATH  "C:\Users\Joel Stiebale\Dropbox\RnD_tax_credit\Tables"

*global MY_IN_PATH   "C:\Users\Joel Stiebale\Dropbox\RnD_tax_credit\ReStat_repl_package\Data"
*global MY_OUT_PATH  "C:\Users\Joel Stiebale\Dropbox\RnD_tax_credit\ReStat_repl_package\Data"



****************************************************************************************************
* import CS data
*************************************************************************************************

use "${MY_IN_PATH}/patents_class_count_hhi_gvkey.dta", clear 
bys gvkey ayear : gen i = _N
keep if i==1 
destring gvkey, replace
rename ayear year 
tempfile pathhi
save `pathhi'


use "${MY_IN_PATH}/Compustat_complete1950to201511_selecteditems_n.dta", clear

* merge CPI
merge n:1 year using "${MY_IN_PATH}/cpi.dta"

keep if _merge ==3
drop _merge
sum cpi

replace cpi = cpi/100
sum cpi

gen lrd_def = log(xrd/cpi)
replace lrd_def = 0 if lrd_def ==.


* merge new tech prox measure 
destring gvkey, replace
merge 1:1 gvkey year using "${MY_IN_PATH}/class_tech_proximity_maria/techprox_newptas20221003_pdate.dta"

drop if _merge == 2
drop _merge

merge 1:1 gvkey year using `pathhi'

drop if _merge == 2
drop _merge

recode *uspc(.=0)


foreach var of varlist no_new_cl5_2020 npat_new5_50_2020 npat2020 npat_old5_2020 {
replace `var' = 0 if `var' ==.
}

gen tp5y = 1-tp_raw5_2020 
gen lnpat = log(npat2020+1)


statastates, abbreviation(state)
keep if _merge == 3
drop _merge

ren state_fips fips

*merge wilson tax credits
merge n:1 fips year using "${MY_IN_PATH}/rd_tax_credit_det.dta "
keep if _merge ==3 
drop _merge

* Merge CPI
merge m:1 year using "${MY_IN_PATH}/cpi.dta", nogenerate keep(master matched)	


*******************************************************************************************************************
* Sample preparation
*******************************************************************************************************************
drop h hh hhh hhhh

replace xrd = 0 if xrd ==.
drop if xrd < 0

bys gvkey year (datadate): drop if _n!=1

destring sic, replace
drop if sic >= 6000 & sic <= 6999 // drop financials

drop if year > 2006

* only firms that patent
gegen h = sum(npat2020), by(gvkey)
drop if h == 0
drop h


gen rd_d = xrd/cpi
gen xrd_d = rd_d

egen _first_tax = min(year) if rd_efr_hi>0 & rd_efr_hi<., by(fips)
recode _first_tax (.=0)

egen first_tax = max(_first_tax), by(fips)


recode npat*old* (.=0)
recode npat*new* (.=0)

gen no_patold_cl_1000 = min(1000, npat_old5_50_2020) if npat_old5_50_2020!=.
gen npat_alt_1000 = npat_new5_50_2020 + no_patold_cl_1000

gen fr_npat_new5_w = npat_new5_2020 / npat_alt_1000
capture drop fr_new
gen fr_new = npat_new5_50_2020 / npat_alt_1000

* Patent dummies
gen dpalt = npat_alt_1000 > 0
gen dpnew =  npat_new5_2020 > 0
gen dpnewtech =  npat_new5_50_2020 > 0
gen dpold = no_patold_cl_1000 > 0

capture drop xrd_1000
* R&D  winsorized at 1000
gen xrd_1000 = min(1000000, xrd_d*100) if xrd_d!=.

gen drd = xrd_d>0 & xrd_d<.
replace drd=. if xrd_d==.

gen sid = fips

capture drop l0rd_efr_hi
gen  l0rd_efr_hi =  rd_efr_hi

* no tax dummy, no profit dummies
gen no_tax = txt<=0
replace no_tax = . if txt==.

gen no_ebit = ebit/cpi<=0 
replace no_ebit = . if ebit==.

gen no_profit = gp/cpi <=0
replace no_profit = . if gp==.

rename count_uspc  uspc_count

* Aggregate profit and tax variables to the firm-level using observations before the first tax credit (1982)
foreach x in  no_tax  no_ebit  no_profit {
gegen _total_`x'_i = sum(`x') if year<1982, by(gvkey)
gegen total_`x'_i  = max(_total_`x'_i ), by(gvkey)
drop _total_`x'_i 
gegen av_`x' = mean(`x') if year<1982, by(sic)
gegen av_`x'_ = min(av_`x'), by(sic)
replace av_`x'  = av_`x'_
drop av_`x'_ 
capture drop c count*
gen c = av_`x'!=.
egen count=sum(c), by(sic)
egen count_i = sum(c), by(gvkey)
egen `x'_i = sum(`x'), by(gvkey)
gen Av_`x' = av_`x'
replace Av_`x' = av_`x' * count / (count-count_i)  - `x'_i  / (count-count_i) if `x'_i !=.
sum Av_`x', d
gen dhi_`x' = 1 if Av_`x' > r(p50) & Av_`x' !=.
replace dhi_`x' = 0 if dhi_`x' ==. & Av_`x' !=.
sum dhi_`x'
}

gegen h = count(1) if year < 1982, by(sic)
gegen n = min(h), by(sic)
*sum n,d  
*drop if n < 10

gen sic2 = substr(string(sic),1,2)
gen sic3 = substr(string(sic),1,3)

egen _av_profit_i = mean(gp/cpi) if year<1982, by(gvkey)
egen _sd_profit_i = sd(gp/cpi) if year<1982, by(gvkey)
egen av_profit_i = max(_av_profit_i), by(gvkey)
egen sd_profit_i = max(_sd_profit_i), by(gvkey)
gen unc_i = sd_profit_i / abs(av_profit_i)
egen unc_j = mean(unc_i), by(sic3)

* Unc_j takes out the focal firm of the uncertainty measure: 
* average uncertainty by industy is: unc_j = 1/N  \sum_{i=1}^N unc_i
* we want to measure uncertainty without firm i: Unc_j = (unc_j) * N/(N-K) - unc_i *K / (N-K) 
* where N is the number if observations in an industry and K are the number of non-missing observations for firm i
capture drop c count*
gen c = unc_i!=.
egen count=sum(c), by(sic)
egen count_i = sum(c), by(gvkey)
gen Unc_j = unc_j
replace Unc_j = unc_j * count / (count-count_i)  - unc_i *count_i / (count-count_i) if unc_i!=.



drop if year < 1977

la var xrd_1000 "R&D"
la var npat_alt_1000 "Patents"
la var no_patold_cl_1000 "Old tech"
la var npat_new5_2020 "New class"
la var npat_new5_50_2020 "New tech"
la var tp_raw5_2020 "tech.proximity"
la var dpold "Doldtech"
la var dpnew "Dnewclass"
la var dpnewtech "Dnewtech"
la var xrd_d "R&D"


* treatment dummy
capture drop treat
capture drop l1_treat

gen treat = rd_efr_hi>0 
la variable treat "Tax credit event"

gen l1_treat = l1rd_efr_hi >0

gen roa = ebit / at
gen lass = ln(at)	
gen lnsale = ln(sale)
foreach v in roa at sale {
sum `v', d
replace `v' = r(p1) if `v'<r(p1) 
replace `v' = r(p99) if `v'>r(p99) 
}




*  use only significant changes
capture drop l1_streat
gen l1_streat = l1_treat
replace l1_streat = 0 if state=="WI" & year<1991
replace l1_streat = 0 if state=="ND" & year<1991
replace l1_streat = 0 if state=="IA" & year<1991
replace l1_streat = 0 if state=="CA" & year<1991
replace l1_streat = 0 if state=="IN" & year<1991

global cohort2 first_taxB	

capture  drop first_taxB
gen first_taxB = first_tax
replace first_taxB = 1990 if state=="WI" | state=="IN" | state=="IA" | state=="CA" | state=="ND"

* Define binary treatment and event studies					
 * define individual & time FE & cohort cluster
global idcode gvkey 
global year year
global cluster sid
* define cohort variable, non-treated 0 (to be generated)
global cohort first_tax
* enter first pre and last post-treatment period
global first 5 
global last  15


egen treatgroup = max(treat) if first_tax<=1991, by(sid)
gen treat_before = treatgroup*(1-treat)
gen treat_after = treatgroup*treat
gen usercost = rho_high - 1

capture log close

log using "$MY_OUT_PATH\AppendixTablesSep2024_9.log", replace
				
****************************************************************************************************************************************
****************************************************************************************************************************************
* Appendix tables: 
****************************************************************************************************************************************
****************************************************************************************************************************************

****************************************************************************************************************************************
* TABLE: Binary tax credit events: Robustness using the first tax credit change 
****************************************************************************************************************************************

* Define binary treatment and event studies					
 * define individual & time FE & cohort cluster
* define cohort variable, non-treated 0 (to be generated)
global cohort first_tax
* enter first pre and last post-treatment period
global first 5 
global last  15

*global eventsample first_tax<=1991 & ((year>=$cohort - $first & year<= $cohort + $last) | $cohort==0)
* only use tax credit introductions until 1991, up to 15 years post introduction , up to 5 years pre introduction to maximize sample size
* Staggered treatment event studies with OLS and Poisson 

********************************************************************************
* Table A1, panel A
********************************************************************************
ppmlhdfe xrd_d l1_treat if first_tax<=1991 & ((year>=$cohort - $first & year<= $cohort + $last) | $cohort==0),  absorb(gvkey year) cluster(sid)
estimates store ba
scalar ra_xrd_d = e(r2_p)
ppmlhdfe npat_alt_1000 l1_treat if first_tax<=1991 & ((year>=$cohort - $first & year<= $cohort + $last) | $cohort==0),  absorb(gvkey year) cluster(sid)					
estimates store bb
scalar ra_npat_alt_1000 = e(r2_p)
ppmlhdfe  no_patold_cl_1000  l1_treat if first_tax<=1991 & ((year>=$cohort - $first & year<= $cohort + $last) | $cohort==0),  cluster(sid) absorb(gvkey year)					
est store bc
scalar ra_no_patold_cl_1000 = e(r2_p)
ppmlhdfe  npat_new5_50_2020  l1_treat if first_tax<=1991 & ((year>=$cohort - $first & year<= $cohort + $last) | $cohort==0),  cluster(sid) absorb(gvkey year)					
est store bd
scalar ra_npat_new5_50_2020 = e(r2_p)

esttab ba bb bc bd , replace  se b(%10.3f)  starlevels(* 0.10 ** 0.05 *** 0.01) noobs  ///
					drop(_cons) ///
					title("Tax credits, R&D and patenting, Poisson / exponential mean regression")  label	type nogap compress noeqli addn("Standard errors clustered at the state level" "All regression include firm and year fixed effects") coeflabels(l1_treat "TaxCreditEvent") 				

foreach y in xrd_d npat_alt_1000 no_patold_cl_1000 npat_new5_50_2020 {
qui poisson `y'   l1_treat  if first_tax<=1991, cluster(sid)  
gen fullsample_a=e(sample)
egen Nsample = sum(fullsample_a)
qui sum Nsample
di "N  for `v' : `r(max)'"
egen id_b=group(gvkey ) if fullsample_a==1
qui sum(id_b)
di "N firms for `y' : `r(max)'"
drop Nsample fullsample_a  id_b
di "Pseudo R2:" ra_`y' 
}	
					
********************************************************************************
															 
* define relative time indicators
capture drop ry
gen ry = year - $cohort

forvalues k = 1/$first {
capture drop g_`k'
gen g_`k' = ry == -1 * `k'
la var g_`k' "-`k'"
}

forvalues k = 0/$last {
capture drop g`k'
gen g`k' = ry == `k'
la var g`k' "`k'"
}


global varlistpre
global varlistpost
global varlistpostl
global vpre
global vpost
global vpostl 
 
forvalues k = $first (-1)2 {
global varlistpre $varlistpre i.$cohort#c.g_`k'
global vpre $vpre g_`k'
}

forvalues k = 0/ $last {
global varlistpost $varlistpost i.$cohort#c.g`k'
global vpost $vpost g`k'
}

forvalues k = 1/ $last {
global varlistpostl $varlistpostl i.$cohort#c.g`k'
global vpostl $vpostl g`k'
}
					

********************************************************************************
* Table A1, panel B
********************************************************************************
				
foreach y in xrd_d npat_alt_1000 no_patold_cl_1000 npat_new5_50_2020 {
qui ppmlhdfe `y'    $varlistpostl  if first_tax<=1991 & ((year>=$cohort - $first & year<= $cohort + $last) | $cohort==0) , ///
 absorb(gvkey year) vce(cluster sid) keepsingle 

scalar rb_`y' = e(r2_p)
capture drop wsample
gen wsample = e(sample)
 
qui {
count if wsample==1 & year>$cohort & $cohort!=0
local N = r(N)
local att 0
foreach v in  $vpostl  {
count if wsample==1 & `v'==1 & year>$cohort & $cohort!=0
levelsof $cohort if wsample==1 & year>$cohort & $cohort!=0, local(level)
foreach w in `level' {
count if wsample ==1 & `v'==1 & $cohort ==`w'
local s = r(N)
local att `att' + _b[`w'.$cohort#c.`v']*`s' /`N'
					}
						}
  }
nlcom (att: `att' ) , post
est store e_`y'					
}					


esttab e_xrd_d e_npat_alt_1000 e_no_patold_cl_1000 e_npat_new5_50_2020 , replace  se b(%10.3f)  starlevels(* 0.10 ** 0.05 *** 0.01)  noobs ///
					title("Panel A: Tax credits, R&D and patenting, Wooldridge estimator")  label	type nogap compress noeqli addn("Standard errors clustered at the state level" "All regression include firm and year fixed effects") coeflabels(l1rd_efr_hi "TaxCreditRate"  att "TaxCreditEvent" ) 				

* generate counts for number of observations and firms
foreach y in xrd_d npat_alt_1000 no_patold_cl_1000 npat_new5_50_2020 {
qui poisson `y'  $varlistpostl  if first_tax<=1991 & ((year>=$cohort2 - $first & year<= $cohort2 + $last) | $cohort2==0) , cluster(sid)  
gen fullsample_a=e(sample)
egen Nsample = sum(fullsample_a)
qui sum Nsample
di "N  for `v' : `r(max)'"
egen id_b=group(gvkey ) if fullsample_a==1
qui sum(id_b)
di "N firms for `y' : `r(max)'"
drop Nsample fullsample_a  id_b
di "Pseudo R2:" rb_`y' 
}									


****************************************************************************************************************************************
* TABLE: Interaction with Cohen Measure of Patent Effectiveness
****************************************************************************************************************************************

preserve

*Prepare isc variable for match with cohen data
* sic codes from https://www.osha.gov/pls/imis/sic_manual.html, 2020 09 07
gen isic = .
replace isic = 1500 if sic2 == "20" // Food And Kindred Products
replace isic = 1700 if sic2 == "22" | sic2 == "23" // Textile Mill Products, Apparel And Other Finished Products Made From Fabrics And Similar Materials
replace isic = 2100 if sic2 == "26" // Paper And Allied Products
replace isic = 2200 if sic2 == "27" // Printing, Publishing, And Allied Industries
replace isic = 2320 if sic2 == "29" // Petroleum Refining And Related Industries
replace isic = 2400 if sic2 == "28" // Chemicals And Allied Products
replace isic = 2413 if sic3 == "282" //  Plastics Materials And Synthetic Resins, Synthetic
replace isic = 2423 if sic3 == "283" // Drugs
replace isic = 2429 if sic3 == "289" // Miscellaneous Chemical Products
replace isic = 2500 if sic2 == "30" // Rubber And Miscellaneous Plastics Products
replace isic = 2600 if sic2 == "32" // Stone, Clay, Glass, And Concrete Products
replace isic = 2610 if sic3 == "322"  | sic3 == "323"  //  Glass And Glassware, Pressed Or Blown ,, Glass Products, Made Of Purchased Glass
replace isic = 2695 if sic3 == "324" // Cement, Hydraulic
replace isic = 2700 if sic2 == "33" //  Primary Metal Industries
replace isic = 2710 if sic3 == "331" | sic3 == "332"  // Steel Works, Blast Furnaces, And Rolling And Finishing Mills, Iron And Steel Foundries
replace isic = 2800 if sic2 == "34" //  Fabricated Metal Products, Except Machinery And Transportation Equipment
replace isic = 2910 if sic3 == "356" // General Industrial Machinery And Equipment
replace isic = 2920 if sic3 == "355" // Special Industry Machinery, Except Metalworking
replace isic = 2922 if sic3 == "354" // Metalworking Machinery And Equipment
replace isic = 3010 if sic3 == "357" // Computer And Office Equipment
replace isic = 3100 if sic2 == "36" // Electronic And Other Electrical Equipment And Components, Except Computer Equipment
*replace isic = 3110 if sic3 == "371" // 
replace isic = 3210 if sic3 == "367" // Electronic Components And Accessories
replace isic = 3211 if sic == 3674 // Semiconductors and Related Devices
replace isic = 3220  if sic3 == "366" // Communications Equipment
replace isic = 3230 if sic == 3663 // Radio and Television Broadcasting and Communications Equipment
replace isic = 3311 if sic3 == "384" //Surgical, Medical, And Dental Instruments And Supplies
*replace isic = 3312 if sic2 == "" //
replace isic = 3314  if sic3 == "381" // Search, Detection, Navigation, Guidance, Aeronautical, and Nautical Systems, Instruments, and Equipment
replace isic = 3410 if sic3 == "371" // Motor Vehicles And Motor Vehicle Equipment
replace isic = 3430 if sic == 3714 // Motor Vehicle Parts and Accessories
replace isic = 3530 if sic3 == "372" // Aircraft And Parts
replace isic = 3600 if sic2 == "" //
* Other manufactoring
replace isic = 3600 if sic2 == "39" //  Miscellaneous Manufacturing Industries


merge n:1 isic using "${MY_IN_PATH}/cms_2000_table1.dta"
drop if _merge == 2
drop _merge


********************************************************************************
** Table A2: High vs. low appropriation risk // importance of patents
********************************************************************************
** from CNW : Table 1: Mean Percentage of Product Innovations for which Mechanism Considered Effectiv

drop if year < 1977
sum patents,d
gen dhi = 1 if patents > r(p50) & patents !=.
replace dhi = 0 if dhi ==. & patents !=.
sum dhi*
capture drop l1rd_efr_hi_dhi
gen l1rd_efr_hi_dhi = l1rd_efr_hi*dhi
										
				
ppmlhdfe xrd_d  l1rd_efr_hi  l1rd_efr_hi_dhi if first_tax<=1991 , absorb(gvkey year) cluster(sid)
estimates store a
scalar rb_xrd_d = e(r2_p)
			
ppmlhdfe npat_alt_1000  l1rd_efr_hi l1rd_efr_hi_dhi if first_tax<=1991 , absorb(gvkey year) cluster(sid)					
estimates store b
scalar rb_npat_alt_1000 = e(r2_p)

ppmlhdfe  no_patold_cl_1000   l1rd_efr_hi  l1rd_efr_hi_dhi if first_tax<=1991, cluster(sid) absorb(gvkey year)					
est store c
scalar rb_no_patold_cl_1000 = e(r2_p)

ppmlhdfe  npat_new5_50_2020   l1rd_efr_hi l1rd_efr_hi_dhi if first_tax<=1991 , cluster(sid) absorb(gvkey year)					
est store d
scalar rb_npat_new5_50_2020 = e(r2_p)

esttab a b c d , replace pr2 se b(%10.3f)  starlevels(* 0.10 ** 0.05 *** 0.01) noobs  ///
					keep (  l1rd_efr_hi l1rd_efr_hi_dhi) ///
					 title("Tax credits, R&D and patenting by patent effectivness")  label	type nogap compress noeqli ///
					addn("Standard errors clustered at the state level" "All regression include firm and year fixed effecs") coeflabels(l1rd_efr_hi "Tax credit t-1" l1rd_efr_hi_dhi "TaxCredit*HighEffective") 				

foreach y in xrd_d npat_alt_1000 no_patold_cl_1000 npat_new5_50_2020 {
qui poisson `y'   l1rd_efr_hi  if first_tax<=1991, cluster(sid)  
gen fullsample_a=e(sample)
egen Nsample = sum(fullsample_a)
qui sum Nsample
di "N  for `v' : `r(max)'"
egen id_b=group(gvkey ) if fullsample_a==1
qui sum(id_b)
di "N firms for `y' : `r(max)'"
drop Nsample fullsample_a  id_b
di "Pseudo R2:" rb_`y' 
}	
 										
restore
									
****************************************************************************************************************************************
* TABLE A3: R&D tax credits across all states
****************************************************************************************************************************************

* results for the whole sample (incl. post 91 tax changes without NJ)			
		
ppmlhdfe xrd_d l1rd_efr_hi if state!="NJ" , absorb(gvkey year) cluster(sid) 
estimates store a1
scalar rb_xrd_d = e(r2_p)
ppmlhdfe npat_alt_1000 l1rd_efr_hi if state!="NJ" , absorb(gvkey year) cluster(sid)	 				
estimates store b1
scalar rb_npat_alt_1000 = e(r2_p)
ppmlhdfe  no_patold_cl_1000  l1rd_efr_hi if state!="NJ" , cluster(sid) absorb(gvkey year) 			
est store c1
scalar rb_no_patold_cl_1000 = e(r2_p)
ppmlhdfe  npat_new5_50_2020  l1rd_efr_hi if state!="NJ" , cluster(sid) absorb(gvkey year)					
est store d1
scalar rb_npat_new5_50_2020 = e(r2_p)

esttab a1 b1 c1 d1  ,    replace 	b(3) se(3)  starlevels(* 0.10 ** 0.05 *** 0.01)    ///
					keep (l1rd_efr_hi) sfmt(%10.0f) ///
					title("Full sample, Poisson") compress label coeflabels(l1rd_efr_hi "Tax credit t-1") 		
				
foreach y in xrd_d npat_alt_1000 no_patold_cl_1000 npat_new5_50_2020 {
qui poisson `y'   l1rd_efr_hi  if state!="NJ", cluster(sid)  
gen fullsample_a=e(sample)
egen Nsample = sum(fullsample_a)
qui sum Nsample
di "N  for `v' : `r(max)'"
egen id_b=group(gvkey ) if fullsample_a==1
qui sum(id_b)
di "N firms for `y' : `r(max)'"
drop Nsample fullsample_a  id_b
di "Pseudo R2:" rb_`y' 
}	
				
	
ppmlhdfe xrd_d l1rd_efr_hi , absorb(gvkey year) cluster(sid) d
estimates store a1
scalar ra_xrd_d = e(r2_p)
ppmlhdfe npat_alt_1000 l1rd_efr_hi, absorb(gvkey year) cluster(sid)					
estimates store b1
scalar ra_npat_alt_1000 = e(r2_p)
ppmlhdfe  no_patold_cl_1000  l1rd_efr_hi  , cluster(sid) absorb(gvkey year)					
est store c1
scalar ra_no_patold_cl_1000 = e(r2_p)
ppmlhdfe  npat_new5_50_2020  l1rd_efr_hi  , cluster(sid) absorb(gvkey year)					
est store d1							
scalar ra_npat_new5_50_2020 = e(r2_p)
esttab a1 b1 c1 d1 ,  replace pr2	b(3) se(3)   starlevels(* 0.10 ** 0.05 *** 0.01)   noobs  ///
					keep (l1rd_efr_hi) sfmt(%10.0f) ///
					title("Full sample, Poisson") compress label coeflabels(l1rd_efr_hi "Tax credit t-1") 

foreach y in xrd_d npat_alt_1000 no_patold_cl_1000 npat_new5_50_2020 {
qui poisson `y'   l1rd_efr_hi , cluster(sid)  
gen fullsample_a=e(sample)
egen Nsample = sum(fullsample_a)
qui sum Nsample
di "N  for `v' : `r(max)'"
egen id_b=group(gvkey ) if fullsample_a==1
qui sum(id_b)
di "N firms for `y' : `r(max)'"
drop Nsample fullsample_a  id_b
di "Pseudo R2:" ra_`y' 
}		
							
****************************************************************************************************************************************
* TABLE A4: alternative functional forms
****************************************************************************************************************************************
* Panel A, OLS in levels
********************************************************************************
* Table OLS R&D tax credit rate and innovation
* R&D also winsorized at 1000
capture drop xrd_1000
sum xrd_d, d
gen xrd_1000 = min(1000, xrd_d) if xrd_d!=.

reghdfe xrd_1000 l1rd_efr_hi if first_tax<=1991, absorb(gvkey year) cluster(sid)
estimates store a1
sum xrd_1000 if e(sample)
scalar ra_xrd_1000 = e(r2_p)
reghdfe npat_alt_1000 l1rd_efr_hi if first_tax<=1991, absorb(gvkey year) cluster(sid)					
estimates store b1
scalar ra_npat_alt_1000 = e(r2_p)
sum npat_alt_1000 if e(sample)
capture drop patsample
gen patsample = e(sample)
reghdfe  no_patold_cl_1000  l1rd_efr_hi if first_tax<=1991, cluster(sid) absorb(gvkey year)					
est store c1
scalar ra_no_patold_cl_1000 = e(r2_p)
sum no_patold_cl_1000 if e(sample)
reghdfe   npat_new5_50_2020  l1rd_efr_hi if first_tax<=1991, cluster(sid) absorb(gvkey year)					
est store d1		
scalar ra_npat_new5_50_2020 = e(r2_p)
sum npat_new5_50_2020 if e(sample)
sum rd_efr_hi if patsample, d
sum rd_efr_hi if patsample & rd_efr_hi>0, d
sum rd_efr_hi if patsample & rd_efr_hi>0 & l1rd_efr_hi, d

esttab a1 b1 c1 d1 ,	replace r2 se b(%10.3f)  starlevels(* 0.10 ** 0.05 *** 0.01)    noobs  ///
					keep (l1rd_efr_hi) sfmt(%10.0f) ///
					title("Tax credits, R&D and patenting, OLS in levels")  label	type nogap compress noeqli addn("Standard errors clustered at the state level" "All regression include firm and year fixed effecs") coeflabels(l1rd_efr_hi "Tax credit, t-1") 				


					
* Dummies					
reghdfe  drd  l1rd_efr_hi if first_tax<=1991 & patsample==1, cluster(sid) absorb(gvkey year)					
est store a3
scalar ra_drd = e(r2_p)
sum drd if e(sample)
reghdfe  dpalt  l1rd_efr_hi if first_tax<=1991 & patsample==1, cluster(sid) absorb(gvkey year)					
est store b3
scalar ra_dpalt = e(r2_p)
sum dpalt if e(sample)
reghdfe  dpold  l1rd_efr_hi if first_tax<=1991 & patsample==1, cluster(sid) absorb(gvkey year)					
est store c3
scalar ra_dpold = e(r2_p)
sum dpold if e(sample)
reghdfe  dpnewtech  l1rd_efr_hi if first_tax<=1991 & patsample==1, cluster(sid) absorb(gvkey year)					
est store d3
scalar ra_dpnewtech = e(r2_p)
sum dpnew if e(sample)

esttab a3 b3 c3 d3 ,	r se b(%10.3f)  starlevels(* 0.10 ** 0.05 *** 0.01)  ///
					keep (l1rd_efr_hi) ///
					order (l1rd_efr_hi) title("Tax credits, R&D and patenting, alternative indicators, OLS regressions")  label	type nogap compress noeqli ///
					addn("Standard errors clustered at the state level" "All regression include firm and year fixed effecs") 				
					
		

tsset gvkey year					
* dummies with Logit 					
xtlogit  drd  l1rd_efr_hi i.year if first_tax<=1991 & patsample==1	, fe			
est store a3
sum drd if e(sample)
scalar ra_drd = e(r2_p)
xtlogit  dpalt  l1rd_efr_hi i.year if first_tax<=1991 & patsample==1	, fe				
est store b3
sum dpalt if e(sample)
scalar ra_dpalt = e(r2_p)
xtlogit  dpold  l1rd_efr_hi i.year if first_tax<=1991 & patsample==1	, fe				
est store c3
sum dpold if e(sample)
scalar ra_dpold = e(r2_p)
xtlogit  dpnewtech  l1rd_efr_hi i.year if first_tax<=1991 & patsample==1	, fe				
est store d3
sum dpnewtech if e(sample)
scalar ra_dpnewtech = e(r2_p)

esttab a3 b3 c3 d3 , pr2	replace se b(%10.3f)  starlevels(* 0.10 ** 0.05 *** 0.01)  noobs   sfmt(%10.0f) ///
					keep (l1rd_efr_hi) ///
					order (l1rd_efr_hi) title("Tax credits, R&D and patenting, alternative indicators, Logit FE regressions")  label	type nogap compress noeqli ///
					addn("Standard errors clustered at the state level" "All regression include firm and year fixed effecs") 				
																	
					
* alternative: logs for positive values					
gen ln_rd = ln(xrd)
gen ln_no_patnew_class_1000 = ln(npat_new5_2020)
gen ln_no_patnew_cl_1000 = ln(npat_new5_50_2020)
gen ln_no_patold_cl_1000 = ln(no_patold_cl_1000)
gen ln_npat_alt_1000 = ln(npat_alt_1000)

la var ln_rd "ln(R&D)"
la var ln_npat_alt_1000 "ln(patents)"
la var ln_no_patold_cl_1000 "ln(otech)"
la var ln_no_patnew_cl_1000 "ln(ntech)"
la var ln_no_patnew_class_1000 "ln(nclass)"

reghdfe  ln_rd  l1rd_efr_hi if first_tax<=1991, cluster(sid) absorb(gvkey year)					
scalar ra_ln_rd = e(r2_p)
est store a5
reghdfe  ln_npat_alt_1000  l1rd_efr_hi if first_tax<=1991, cluster(sid) absorb(gvkey year)					
scalar ra_ln_npat_alt_1000 = e(r2_p)
est store b5
reghdfe  ln_no_patold_cl_1000  l1rd_efr_hi if first_tax<=1991, cluster(sid) absorb(gvkey year)					
scalar ra_ln_no_patold_cl_1000 = e(r2_p)
est store c5
reghdfe  ln_no_patnew_cl_1000  l1rd_efr_hi if first_tax<=1991, cluster(sid) absorb(gvkey year)					
scalar ra_ln_no_patnew_cl_1000 = e(r2_p)
est store d5
reghdfe  ln_no_patnew_class_1000  l1rd_efr_hi if first_tax<=1991, cluster(sid) absorb(gvkey year)					
est store e5
scalar ra_ln_no_patnew_class_1000 = e(r2_p)



esttab a5 b5 c5 d5 ,	r se b(%10.3f)  starlevels(* 0.10 ** 0.05 *** 0.01)  ///
					keep (l1rd_efr_hi)  sfmt(%10.0f) noobs ///
					order (l1rd_efr_hi) title("Tax credits and patents log(y) if y>0")  label	type nogap compress noeqli ///
					addn("Standard errors clustered at the state level" "All regression include firm and year fixed effecs") 				
	
foreach y in ln_rd ln_npat_alt_1000 ln_no_patold_cl_1000 ln_no_patnew_cl_1000  {
qui reg `y' l1rd_efr_hi  if first_tax<=1991, cluster(sid)  
gen fullsample_a=e(sample)
egen Nsample = sum(fullsample_a)
qui sum Nsample
di "N  for `v' : `r(max)'"
egen id_b=group(gvkey ) if fullsample_a==1
qui sum(id_b)
di "N firms for `y' : `r(max)'"
drop Nsample fullsample_a  id_b
di "Pseudo R2:" ra_`y' 
}				
	
* inverse hyperbolic transformation			
gen a_xrd = asinh(xrd_1000)
gen a_npat_alt_1000  = asinh(npat_alt_1000) 
gen a_no_patold_cl_1000  = asinh(no_patold_cl_1000 )			
gen a_no_patnew_cl_1000  = asinh( npat_new5_50_2020 )	
gen a_npat_new5_2020  = asinh(npat_new5_2020)
gen a_exploit_ratio = a_no_patold_cl_1000 - a_no_patnew_cl_1000
gen a_tp_raw5_2020 = asinh(tp_raw5_2020)

la var a_xrd "R&D"
la var a_npat_alt_1000 "Patents"
la var a_no_patold_cl_1000 "Old tech"
la var a_no_patnew_cl_1000 "New tech"
la var a_npat_new5_2020 "New class"

reghdfe  a_xrd  l1rd_efr_hi if first_tax<=1991, cluster(sid) absorb(gvkey year)					
scalar ra_a_xrd = e(r2_p)
est store a5
reghdfe  a_npat_alt_1000  l1rd_efr_hi if first_tax<=1991, cluster(sid) absorb(gvkey year)					
scalar ra_a_npat_alt_1000 = e(r2_p)
est store b5
reghdfe  a_no_patold_cl_1000  l1rd_efr_hi if first_tax<=1991, cluster(sid) absorb(gvkey year)					
scalar ra_a_no_patold_cl_1000 = e(r2_p)
est store c5
reghdfe  a_no_patnew_cl_1000  l1rd_efr_hi if first_tax<=1991, cluster(sid) absorb(gvkey year)					
scalar ra_a_no_patnew_cl_1000 = e(r2_p)
est store d5

esttab a5 b5 c5 d5 ,	r se b(%10.3f)  starlevels(* 0.10 ** 0.05 *** 0.01)  ///
					keep (l1rd_efr_hi) sfmt(%10.0f) noobs ///
					order (l1rd_efr_hi) title("Tax credits and patents, inverse hyperbolic transformation")  label	type nogap compress noeqli ///
					addn("Standard errors clustered at the state level" "All regression include firm and year fixed effects") 			
			
				
****************************************************************************************************************************************
* TABLE A5: OLS binary treatment
****************************************************************************************************************************************

* Panel A

					
foreach y in xrd_1000 npat_alt_1000 no_patold_cl_1000 npat_new5_50_2020 {
reghdfe `y' l1_treat if first_tax<=1991 , absorb(gvkey year) cluster(sid)
estimates store a_`y'
scalar ra_`y' = e(r2_p)
sum `y' if e(sample) 
}

esttab a_xrd_1000 a_npat_alt_1000 a_no_patold_cl_1000 a_npat_new5_50_2020, 	r se b(%10.3f)  starlevels(* 0.10 ** 0.05 *** 0.01)  ///
					drop (_cons) sfmt(%10.0f) noobs ///
					 title("Tax credits, R&D and patenting, linear models")  label	type nogap compress noeqli ///
					addn("Standard errors clustered at the state level" "All regression include firm and year fixed effecs") coeflabels(l1_treat "TaxCreditEvent") 							

foreach y in xrd_1000 npat_alt_1000 no_patold_cl_1000 npat_new5_50_2020 {
qui reg `y' l1_streat  if first_tax<=1991, cluster(sid)  
gen fullsample_a=e(sample)
egen Nsample = sum(fullsample_a)
qui sum Nsample
di "N  for `v' : `r(max)'"
egen id_b=group(gvkey ) if fullsample_a==1
qui sum(id_b)
di "N firms for `y' : `r(max)'"
drop Nsample fullsample_a  id_b
di "Pseudo R2:" ra_`y' 
}	
					
		
* Panel B
					
capture destring sic2, gen(sic2_num)

					
* reweighting
* Propensity score reweighting					
					
* predict tax credit changes						
gen dtreat = treat==1 & l1_treat==0
capture gen lass = ln(at)
capture gen roa = ebit / at
capture destring sic2, gen(sic2_num)
capture gen sic2_d = sic2_num

replace sic2_d = 10 if sic2_num==12
replace sic2_d = 40 if sic2_num==41
replace sic2_d = 59 if sic2_num==55
replace sic2_d = 99 if sic2_num==86

capture drop pscore
probit dtreat lass roa capx year i.sic2_d  if first_tax<=1991 & npat_alt_1000!=.
predict pscore if e(sample), pr

foreach v in  pscore_sample pscore_f treat_f treat_weight f_weight fcontrol_weight control_reweight fcontrol_reweight reweight {
capture drop `v'
}

psmatch2 dtreat lass roa capx year i.sic2_d  if first_tax<=1991, out(npat_alt_1000)

gen pscore_sample = e(sample)
egen pscore_f = max(pscore_sample), by(gvkey)
egen treat_f = max(_treated), by(gvkey)

gen treat_weight = 1 if _treated==1
egen f_weight = max(treat_weight), by(gvkey)
egen fcontrol_weight = sum(_weight), by(gvkey)
replace f_weight = fcontrol_weight if treat_f==0

gen reweight = treat_f
gen control_reweight = pscore / (1-pscore) if  treat_f==0
egen fcontrol_reweight = sum(control_reweight) if treat_f==0, by(gvkey)
replace reweight = fcontrol_reweight if treat_f==0

ppmlhdfe  xrd_d  l1_treat [pweight = reweight] if first_tax<=1991 ,  cluster(sid) absorb(gvkey year)
est store a					
scalar ra_xrd_d = e(r2_p)
ppmlhdfe  npat_alt_1000  l1_treat [pweight = reweight] if first_tax<=1991 ,  cluster(sid) absorb(gvkey year)					
est store b
scalar ra_npat_alt_1000 = e(r2_p)
ppmlhdfe  no_patold_cl_1000  l1_treat [pweight = reweight] if first_tax<=1991 ,  cluster(sid) absorb(gvkey year)					
est store c
scalar ra_no_patold_cl_1000 = e(r2_p)
ppmlhdfe  npat_new5_50_2020  l1_treat [pweight = reweight] if first_tax<=1991 ,  cluster(sid) absorb(gvkey year)					
est store d
scalar ra_npat_new5_50_2020 = e(r2_p)

esttab a b c d  , 	replace pr2 se b(%10.3f)  starlevels(* 0.10 ** 0.05 *** 0.01)  ///
					drop(_cons) sfmt(%10.0f) noobs ///
					title("Tax credits, R&D and patenting, Poisson / exponential mean regression, pscore reweighted estimates")  label	type nogap compress noeqli addn("Standard errors clustered at the state level" "All regression include firm and year fixed effects") coeflabels(l1_treat "TaxCreditEvent") 				

foreach y in xrd_1000 npat_alt_1000 no_patold_cl_1000 npat_new5_50_2020 {
qui reg `y' l1_treat  l1_streat if first_tax<=1991 & ((year>=$cohort2 - $first & year<= $cohort2 + $last) | $cohort2==0) & first_tax<=1991, cluster(sid)  
gen fullsample_a=e(sample)
egen Nsample = sum(fullsample_a)
qui sum Nsample
di "N  for `v' : `r(max)'"
egen id_b=group(gvkey ) if fullsample_a==1
qui sum(id_b)
di "N firms for `y' : `r(max)'"
drop Nsample fullsample_a  id_b
di "Pseudo R2:" ra_`y' 
}				
		
reghdfe  xrd_1000  l1_treat [pweight = reweight] if first_tax<=1991 ,  cluster(sid) absorb(gvkey year)
est store a					
scalar ra_xrd_1000 = e(r2_p)
reghdfe  npat_alt_1000  l1_treat [pweight = reweight] if first_tax<=1991 ,  cluster(sid) absorb(gvkey year)					
est store b
scalar ra_npat_alt_1000 = e(r2_p)
reghdfe  no_patold_cl_1000  l1_treat [pweight = reweight] if first_tax<=1991 ,  cluster(sid) absorb(gvkey year)					
est store c
scalar ra_no_patold_cl_1000 = e(r2_p)
reghdfe  npat_new5_50_2020  l1_treat [pweight = reweight] if first_tax<=1991 ,  cluster(sid) absorb(gvkey year)					
est store d
scalar ra_npat_new5_50_2020 = e(r2_p)


esttab a b c d , 	replace r2 se b(%10.3f)  starlevels(* 0.10 ** 0.05 *** 0.01)  ///
					drop(_cons) sfmt(%10.0f) noobs ///
					title("Tax credits, R&D and patenting, OLS, pscore reweighted estimates")  label	type nogap compress noeqli addn("Standard errors clustered at the state level" "All regression include firm and year fixed effects") coeflabels(l1_treat "TaxCreditEvent") 				
					
foreach y in xrd_1000 npat_alt_1000 no_patold_cl_1000 npat_new5_50_2020 {
qui reg `y' l1_treat  [pweight = reweight] if first_tax<=1991, cluster(sid)  
gen fullsample_a=e(sample)
egen Nsample = sum(fullsample_a)
qui sum Nsample
di "N  for `v' : `r(max)'"
egen id_b=group(gvkey ) if fullsample_a==1
qui sum(id_b)
di "N firms for `y' : `r(max)'"
drop Nsample fullsample_a  id_b
di "Pseudo R2:" ra_`y' 
}	
														
* define relative time indicators
capture drop ry
gen ry = year - $cohort

forvalues k = 1/$first {
capture drop g_`k'
gen g_`k' = ry == -1 * `k'
la var g_`k' "-`k'"
}

forvalues k = 0/$last {
capture drop g`k'
gen g`k' = ry == `k'
la var g`k' "`k'"
}


global varlistpre
global varlistpost
global varlistpostl
global vpre
global vpost
global vpostl 
 
forvalues k = $first (-1)2 {
global varlistpre $varlistpre i.$cohort#c.g_`k'
global vpre $vpre g_`k'
}

forvalues k = 0/ $last {
global varlistpost $varlistpost i.$cohort#c.g`k'
global vpost $vpost g`k'
}

forvalues k = 1/ $last {
global varlistpostl $varlistpostl i.$cohort#c.g`k'
global vpostl $vpostl g`k'
}										
										
* Panel C
										
* Wooldridge estimator
* Caluclate overall time-averaged treatment effect from OLS
foreach y in xrd_1000 npat_alt_1000 no_patold_cl_1000 npat_new5_50_2020 {
qui reghdfe `y'    $varlistpostl  if first_tax<=1991 & ((year>=$cohort - $first & year<= $cohort + $last) | $cohort==0) , ///
 absorb(gvkey year) vce(cluster sid) keepsingle 

scalar rb_`y' = e(r2)
capture drop wsample
gen wsample = e(sample)
 
qui {
count if wsample==1 & year>$cohort & $cohort!=0
local N = r(N)
local att 0
foreach v in  $vpostl  {
count if wsample==1 & `v'==1 & year>$cohort & $cohort!=0
levelsof $cohort if wsample==1 & year>$cohort & $cohort!=0, local(level)
foreach w in `level' {
count if wsample ==1 & `v'==1 & $cohort ==`w'
local s = r(N)
local att `att' + _b[`w'.$cohort#c.`v']*`s' /`N'
					}
						}
  }
nlcom (att: `att' ) , post
est store e_`y'					
}	

esttab e_xrd_1000 e_npat_alt_1000 e_no_patold_cl_1000 e_npat_new5_50_2020 , replace  b(3) se(3)   starlevels(* 0.10 ** 0.05 *** 0.01)  sfmt(%10.0f) noobs

foreach y in xrd_1000 npat_alt_1000 no_patold_cl_1000 npat_new5_50_2020 {
qui reg `y'   $varlistpostl   if first_tax<=1991 & ((year>=$cohort - $first & year<= $cohort + $last) | $cohort==0) & first_tax<=1991, cluster(sid)  
gen fullsample_a=e(sample)
egen Nsample = sum(fullsample_a)
qui sum Nsample
di "N  for `v' : `r(max)'"
egen id_b=group(gvkey ) if fullsample_a==1
qui sum(id_b)
di "N firms for `y' : `r(max)'"
drop Nsample fullsample_a  id_b
di "Pseudo R2:" rb_`y' 
}		

* Panel D

* Table Borussyak et al. DiD imputation estimator
capture drop first_tax_m
gen first_tax_m = first_taxB
recode first_tax_m(0=.)
did_imputation  xrd_1000 gvkey year first_tax_m if first_tax<=1991 ,    cluster(sid) fe(gvkey year) autosample
est store x1
did_imputation  npat_alt_1000 gvkey year first_tax_m if first_tax<=1991 ,    cluster(sid) fe(gvkey year) autosample
est store x2
did_imputation  no_patold_cl_1000 gvkey year first_tax_m if first_tax<=1991 ,    cluster(sid) fe(gvkey year) autosample
est store x3
did_imputation  npat_new5_50_2020 gvkey year first_tax_m if first_tax<=1991 ,    cluster(sid) fe(gvkey year) autosample
est store x4

esttab x1 x2 x3 x4 , replace	b(3) se(3)  starlevels(* 0.10 ** 0.05 *** 0.01)  sfmt(%10.0f) noobs ///
					title("BJS DiD imputation estimator, estimates in levels") compress label coeflabels(tau "TaxCreditEvent")
	 
****************************************************************************************************************************************
* TABLE A6: Different window length for definition of new technologies using alternative citation cutoffs
****************************************************************************************************************************************
	
* alternative measures of fraction new					
foreach v in _50_2020    _75_2020  _90_2020  _2020	{
reghdfe  fr_npat_new5`v'  l1rd_efr_hi if first_tax<=1991, cluster(sid) absorb(gvkey year)					
est store x_`v'
scalar ra_`y' = e(r2)
}				

foreach v in    _50_class_2020  _75_class_2020  _90_class_2020 _max_2020	{
reghdfe  fr_npat_new5`v'  l1rd_efr_hi if first_tax<=1991, cluster(sid) absorb(gvkey year)					
est store y_`v'
scalar ra_`y' = e(r2)
}							
					
ppmlhdfe  npat_new5_50_2020  l1rd_efr_hi if first_tax<=1991,  cluster(sid) absorb(gvkey year)		d			
est store a
scalar ra_npat_new5_50_2020 = e(r2_p) 
ppmlhdfe  npat_new5_50_class_2020  l1rd_efr_hi if first_tax<=1991,  cluster(sid) absorb(gvkey year)		d			
est store b
scalar ra_npat_new5_50_class_2020 = e(r2_p) 
ppmlhdfe  npat_new5_75_2020  l1rd_efr_hi if first_tax<=1991,  cluster(sid) absorb(gvkey year)		d			
est store c
scalar ra_npat_new5_75_2020 = e(r2_p) 
ppmlhdfe  npat_new5_75_class_2020  l1rd_efr_hi if first_tax<=1991,  cluster(sid) absorb(gvkey year)		d	
est store d
scalar ra_npat_new5_75_class_2020 = e(r2_p) 
ppmlhdfe  npat_new5_90_2020  l1rd_efr_hi if first_tax<=1991,  cluster(sid) absorb(gvkey year)		d			
est store e
scalar ra_npat_new5_90_2020 = e(r2_p) 
ppmlhdfe  npat_new5_90_class_2020  l1rd_efr_hi if first_tax<=1991,  cluster(sid) absorb(gvkey year)		d	
est store f
scalar ra_npat_new5_90_class_2020 = e(r2_p) 
ppmlhdfe  npat_new5_max_2020  l1rd_efr_hi if first_tax<=1991,  cluster(sid) absorb(gvkey year)		d			
est store g
scalar ra_npat_new5_max_2020 = e(r2_p) 
ppmlhdfe  npat_new5_2020  l1rd_efr_hi if first_tax<=1991,  cluster(sid) absorb(gvkey year)		d			
est store h
scalar ra_npat_new5_2020 = e(r2_p) 


esttab a  c  e  h  ///
, replace  se b(%10.3f)  starlevels(* 0.10 ** 0.05 *** 0.01) ///
					keep (l1rd_efr_hi) sfmt(%10.0f) noobs ///
					title("Tax credits, R&D and patenting, Poisson / exponential mean regression")  label	type nogap compress noeqli addn("Standard errors clustered at the state level" "All regression include firm and year fixed effecs") coeflabels(l1rd_efr_hi "Tax credit t-1") 				

foreach y in npat_new5_50_2020 npat_new5_75_2020 npat_new5_90_2020 npat_new5_2020 {
qui poisson `y'   l1rd_efr_hi   if first_tax<=1991 , cluster(sid)  
gen fullsample_a=e(sample)
egen Nsample = sum(fullsample_a)
qui sum Nsample
di "N  for `v' : `r(max)'"
egen id_b=group(gvkey ) if fullsample_a==1
qui sum(id_b)
di "N firms for `y' : `r(max)'"
drop Nsample fullsample_a  id_b
di "Pseudo R2:" ra_`y' 
}		
					
esttab x_*,	r  se b(%10.3f)  starlevels(* 0.10 ** 0.05 *** 0.01)  ///
					drop (_cons) sfmt(%10.0f) noobs ///
					 title("Tax credits and fraction new techs")  label	type nogap compress noeqli ///
					addn("Standard errors clustered at the state level" "All regression include firm and year fixed effecs") 				

foreach v in _50_2020    _75_2020  _90_2020  _2020	{
qui reghdfe  fr_npat_new5`v'  l1rd_efr_hi if first_tax<=1991, cluster(sid) absorb(gvkey year)					
est store x_`v'
scalar ra_`v' = e(r2)
}	
					
					
esttab b  d  f g   ///
, replace  se b(%10.3f)  starlevels(* 0.10 ** 0.05 *** 0.01)  ///
					keep (l1rd_efr_hi) sfmt(%10.0f) noobs ///
					title("Tax credits, R&D and patenting, Poisson / exponential mean regression")  label	type nogap compress noeqli addn("Standard errors clustered at the state level" "All regression include firm and year fixed effecs") coeflabels(l1rd_efr_hi "Tax credit t-1") 				

foreach y in _50_2020    _75_2020  _90_2020  _2020 { 
qui reg fr_npat_new5`y'   l1rd_efr_hi   if first_tax<=1991 , cluster(sid)  
gen fullsample_a=e(sample)
egen Nsample = sum(fullsample_a)
qui sum Nsample
di "N  for `v' : `r(max)'"
egen id_b=group(gvkey ) if fullsample_a==1
qui sum(id_b)
di "N firms for `y' : `r(max)'"
drop Nsample fullsample_a  id_b
di "Pseudo R2:" ra_`y' 
}	
					
esttab y_* , r se b(%10.3f)  starlevels(* 0.10 ** 0.05 *** 0.01)  ///
					drop (_cons) sfmt(%10.0f) noobs ///
					 title("Tax credits and fraction new techs")  label	type nogap compress noeqli ///
					addn("Standard errors clustered at the state level" "All regression include firm and year fixed effecs") 				

foreach v in    _50_class_2020  _75_class_2020  _90_class_2020 _max_2020	{
qui reg  fr_npat_new5`v'  l1rd_efr_hi if first_tax<=1991, cluster(sid) 					
gen fullsample_a=e(sample)
egen Nsample = sum(fullsample_a)
qui sum Nsample
di "N  for `v' : `r(max)'"
egen id_b=group(gvkey ) if fullsample_a==1
qui sum(id_b)
di "N firms for `y' : `r(max)'"
drop Nsample fullsample_a  id_b
di "R2:" ra_`y' 
}		
					
										
****************************************************************************************************************************************
* TABLE A7: 10 and 15 -year windows for definition of new technologies
***************************************************************************************************************************************** 

preserve 

capture drop npat_old5_50_2020 
capture drop npat_alt_1000
capture drop npat_new5_50_2020
capture drop no_patold_cl_1000

merge 1:1 gvkey year using "${MY_IN_PATH}/techprox_newptas20221003_pdate_15y.dta", keep(master matched) gen(m15)

recode npat_old5_50_2020 npat_new5_50_2020 (.=0)
gen no_patold_cl_1000 = min(1000, npat_old5_50_2020) if npat_old5_50_2020!=.
gen npat_alt_1000 = npat_new5_50_2020 + no_patold_cl_1000


ppmlhdfe  no_patold_cl_1000  l1rd_efr_hi  if first_tax<=1991,  cluster(sid) absorb(gvkey year)					
est store c
scalar ra_no_patold_cl_1000 = e(r2_p)
ppmlhdfe  npat_new5_50_2020  l1rd_efr_hi   if first_tax<=1991,  cluster(sid) absorb(gvkey year)					
est store d
scalar ra_npat_new5_50_2020 = e(r2_p)

esttab c d  , append  pr2 se b(%10.3f)  starlevels(* 0.10 ** 0.05 *** 0.01)  sfmt(%10.0f) noobs ///
                          drop(_cons) ///
					title("Tax credits, R&D and patenting, Poisson / exponential mean regression")  label	type nogap compress noeqli addn("Standard errors clustered at the state level" "All regression include firm and year fixed effecs") coeflabels(l4rd_efr_hi "Tax credit t-4") 				
						
foreach y in no_patold_cl_1000  npat_new5_50_2020 { 
qui poisson `y'   l1rd_efr_hi   if first_tax<=1991 , cluster(sid)  
gen fullsample_a=e(sample)
egen Nsample = sum(fullsample_a)
qui sum Nsample
di "N  for `v' : `r(max)'"
egen id_b=group(gvkey ) if fullsample_a==1
qui sum(id_b)
di "N firms for `y' : `r(max)'"
drop Nsample fullsample_a  id_b
di "Pseudo R2:" ra_`y' 
}	
						
restore 

*********************************************************************************					
preserve 

capture drop npat_old5_50_2020 
capture drop npat_alt_1000
capture drop npat_new5_50_2020
capture drop no_patold_cl_1000

merge 1:1 gvkey year using "${MY_IN_PATH}/techprox_newptas20221003_pdate_10y.dta", keep(master matched) gen(m10)

recode npat_old5_50_2020 npat_new5_50_2020 (.=0)
gen no_patold_cl_1000 = min(1000, npat_old5_50_2020) if npat_old5_50_2020!=.
gen npat_alt_1000 = npat_new5_50_2020 + no_patold_cl_1000

ppmlhdfe  no_patold_cl_1000  l1rd_efr_hi  if first_tax<=1991,  cluster(sid) absorb(gvkey year)					
est store a
scalar ra_npat_new5_50_2020 = e(r2_p)
ppmlhdfe  npat_new5_50_2020  l1rd_efr_hi   if first_tax<=1991,  cluster(sid) absorb(gvkey year)					
est store b
scalar ra_npat_new5_50_2020 = e(r2_p)

esttab a b , replace pr2 se b(%10.3f)  starlevels(* 0.10 ** 0.05 *** 0.01)  sfmt(%10.0f) noobs ///
                          drop(_cons) ///
					title("Tax credits, R&D and patenting, Poisson / exponential mean regression")  label	type nogap compress noeqli addn("Standard errors clustered at the state level" "All regression include firm and year fixed effecs") coeflabels(l4rd_efr_hi "Tax credit t-4") 				

foreach y in no_patold_cl_1000  npat_new5_50_2020 { 
qui poisson `y'   l1rd_efr_hi   if first_tax<=1991 , cluster(sid)  
gen fullsample_a=e(sample)
egen Nsample = sum(fullsample_a)
qui sum Nsample
di "N  for `v' : `r(max)'"
egen id_b=group(gvkey ) if fullsample_a==1
qui sum(id_b)
di "N firms for `y' : `r(max)'"
drop Nsample fullsample_a  id_b
di "Pseudo R2:" ra_`y' 
}	
					
				
restore 


***************************************************************************************************************************************** 
* TABLE A8: nominal rate, Panel A
***************************************************************************************************************************************** 

ppmlhdfe xrd_d l1rd_nom_hi if first_tax<=1991,  absorb(gvkey year) cluster(sid)
estimates store a
scalar ra_xrd_d = e(r2_p)
ppmlhdfe npat_alt_1000 l1rd_nom_hi if first_tax<=1991 ,  absorb(gvkey year) cluster(sid)					
estimates store b
scalar ra_npat_alt_1000 = e(r2_p)
ppmlhdfe  no_patold_cl_1000  l1rd_nom_hi if first_tax<=1991,  cluster(sid) absorb(gvkey year)					
est store c
scalar ra_no_patold_cl_1000 = e(r2_p)
ppmlhdfe  npat_new5_50_2020  l1rd_nom_hi if first_tax<=1991,  cluster(sid) absorb(gvkey year)					
est store d
scalar ra_npat_new5_50_2020 = e(r2_p)

esttab a b c d , replace pr2 se b(%10.3f)  starlevels(* 0.10 ** 0.05 *** 0.01)  sfmt(%10.0f) noobs keep(l1rd_nom_hi) ///
					title("Tax credits, R&D and patenting, Poisson / exponential mean regression")  label	type nogap compress noeqli addn("Standard errors clustered at the state level" "All regression include firm and year fixed effecs") coeflabels(l1rd_nom_hi "Nominal tax credit rate, t-1") 				
	
foreach y in xrd_d  npat_alt_1000  no_patold_cl_1000 npat_new5_50_2020  { 
qui poisson `y'   l1rd_nom_hi   if first_tax<=1991 , cluster(sid)  
gen fullsample_a=e(sample)
egen Nsample = sum(fullsample_a)
qui sum Nsample
di "N  for `v' : `r(max)'"
egen id_b=group(gvkey ) if fullsample_a==1
qui sum(id_b)
di "N firms for `y' : `r(max)'"
drop Nsample fullsample_a  id_b
di "Pseudo R2:" ra_`y' 
}	
	
***************************************************************************************************************************************** 
* TABLE A8, Panel B&C: user costs
***************************************************************************************************************************************** 

ppmlhdfe xrd_d rho_high if first_tax<=1991,  absorb(gvkey year) cluster(sid)
estimates store a
scalar ra_xrd_d = e(r2_p)
ppmlhdfe npat_alt_1000 rho_high if first_tax<=1991 ,  absorb(gvkey year) cluster(sid)					
estimates store b
scalar ra_npat_alt_1000 = e(r2_p)
ppmlhdfe  no_patold_cl_1000  rho_high if first_tax<=1991,  cluster(sid) absorb(gvkey year)					
est store c
scalar ra_no_patold_cl_1000 = e(r2_p)
ppmlhdfe  npat_new5_50_2020  rho_high if first_tax<=1991,  cluster(sid) absorb(gvkey year)					
est store d
scalar ra_npat_new5_50_2020 = e(r2_p)

esttab a b c d , replace pr2 se b(%10.3f)  starlevels(* 0.10 ** 0.05 *** 0.01)  ///
																			sfmt(%10.0f) noobs									drop(_cons)	///
					title("Tax credits, R&D and patenting, Poisson / exponential mean regression")  label	type nogap compress noeqli addn("Standard errors clustered at the state level" "All regression include firm and year fixed effecs") coeflabels(l4rd_efr_hi "Tax credit t-4") 				

foreach y in xrd_d  npat_alt_1000  no_patold_cl_1000 npat_new5_50_2020  { 
qui poisson `y'   rho_high   if first_tax<=1991 , cluster(sid)  
gen fullsample_a=e(sample)
egen Nsample = sum(fullsample_a)
qui sum Nsample
di "N  for `v' : `r(max)'"
egen id_b=group(gvkey ) if fullsample_a==1
qui sum(id_b)
di "N firms for `y' : `r(max)'"
drop Nsample fullsample_a  id_b
di "Pseudo R2:" ra_`y' 
}	
					
gen ln_rho = ln(rho_high - 1)					
					
* user costs
ppmlhdfe xrd_d ln_rho if first_tax<=1991,  absorb(gvkey year) cluster(sid)
estimates store a
scalar ra_xrd_d = e(r2_p)
ppmlhdfe npat_alt_1000 ln_rho if first_tax<=1991 ,  absorb(gvkey year) cluster(sid)					
estimates store b
scalar ra_npat_alt_1000 = e(r2_p)
ppmlhdfe  no_patold_cl_1000  ln_rho if first_tax<=1991,  cluster(sid) absorb(gvkey year)					
est store c
scalar ra_no_patold_cl_1000 = e(r2_p)
ppmlhdfe  npat_new5_50_2020  ln_rho if first_tax<=1991,  cluster(sid) absorb(gvkey year)					
est store d
scalar ra_npat_new5_50_2020 = e(r2_p)

esttab a b c d, replace pr2 se b(%10.3f)  starlevels(* 0.10 ** 0.05 *** 0.01)  ///
																sfmt(%10.0f) noobs												drop(_cons)	///
					title("Tax credits, R&D and patenting, Poisson / exponential mean regression")  label	type nogap compress noeqli addn("Standard errors clustered at the state level" "All regression include firm and year fixed effecs") coeflabels(l4rd_efr_hi "Tax credit t-4") 				
					
foreach y in xrd_d  npat_alt_1000  no_patold_cl_1000 npat_new5_50_2020  { 
qui poisson `y'   ln_rho   if first_tax<=1991 , cluster(sid)  
gen fullsample_a=e(sample)
egen Nsample = sum(fullsample_a)
qui sum Nsample
di "N  for `v' : `r(max)'"
egen id_b=group(gvkey ) if fullsample_a==1
qui sum(id_b)
di "N firms for `y' : `r(max)'"
drop Nsample fullsample_a  id_b
di "Pseudo R2:" ra_`y' 
}	
					
					
***************************************************************************************************************************************** 
* TABLE A9: controlling for industry trends
***************************************************************************************************************************************** 

ppmlhdfe xrd_d l1rd_efr_hi if first_tax<=1991,  absorb(gvkey i.sic2_num#year) cluster(sid) d
estimates store a
scalar ra_xrd_d = e(r2_p)
ppmlhdfe npat_alt_1000 l1rd_efr_hi if first_tax<=1991 ,  absorb(gvkey i.sic2_num#year) cluster(sid)	d				
estimates store b
scalar ra_npat_alt_1000 = e(r2_p)
ppmlhdfe  no_patold_cl_1000  l1rd_efr_hi if first_tax<=1991,  cluster(sid) absorb(gvkey i.sic2_num#year)	 d				
est store c
scalar ra_no_patold_cl_1000 = e(r2_p)
ppmlhdfe  npat_new5_50_2020  l1rd_efr_hi if first_tax<=1991,  cluster(sid) absorb(gvkey i.sic2_num#year)		d			
est store d
scalar ra_npat_new5_50_2020 = e(r2_p)

esttab a b c d , replace pr2 se b(%10.3f)  starlevels(* 0.10 ** 0.05 *** 0.01)  ///
					keep (l1rd_efr_hi) sfmt(%10.0f) noobs ///
					title("Tax credits, R&D and patenting, Poisson / exponential mean regression")  label	type nogap compress noeqli addn("Standard errors clustered at the state level" "All regression include firm and year fixed effecs") coeflabels(l1rd_efr_hi "Tax credit t-1") 				

foreach y in xrd_d  npat_alt_1000  no_patold_cl_1000 npat_new5_50_2020  { 
qui poisson `y'   l1rd_efr_hi   if first_tax<=1991 & sic2_num!=. , cluster(sid)  
gen fullsample_a=e(sample)
egen Nsample = sum(fullsample_a)
qui sum Nsample
di "N  for `v' : `r(max)'"
egen id_b=group(gvkey ) if fullsample_a==1
qui sum(id_b)
di "N firms for `y' : `r(max)'"
drop Nsample fullsample_a  id_b
di "Pseudo R2:" ra_`y' 
}	


log close
					
exit

 